In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import warnings
with warnings.catch_warnings():
warnings.filterwarnings("ignore")
from warnings import simplefilter
simplefilter(action='ignore',category=FutureWarning)
In [2]:
from google.colab import files
uploaded=files.upload()
Saving House data.csv to House data.csv
In [3]:
from google.colab import files
uploaded=files.upload()
Saving test.csv to test.csv
In [4]:
from google.colab import files
uploaded=files.upload()
Saving sample_submission.csv to sample_submission.csv
In [168]:
train=pd.read_csv("House data.csv")
test=pd.read_csv("test.csv")
In [6]:
print(train)
Id MSSubClass MSZoning LotFrontage LotArea Street Alley LotShape \
0 1 60 RL 65.0 8450 Pave NaN Reg
1 2 20 RL 80.0 9600 Pave NaN Reg
2 3 60 RL 68.0 11250 Pave NaN IR1
3 4 70 RL 60.0 9550 Pave NaN IR1
4 5 60 RL 84.0 14260 Pave NaN IR1
... ... ... ... ... ... ... ... ...
1455 1456 60 RL 62.0 7917 Pave NaN Reg
1456 1457 20 RL 85.0 13175 Pave NaN Reg
1457 1458 70 RL 66.0 9042 Pave NaN Reg
1458 1459 20 RL 68.0 9717 Pave NaN Reg
1459 1460 20 RL 75.0 9937 Pave NaN Reg
LandContour Utilities ... PoolArea PoolQC Fence MiscFeature MiscVal \
0 Lvl AllPub ... 0 NaN NaN NaN 0
1 Lvl AllPub ... 0 NaN NaN NaN 0
2 Lvl AllPub ... 0 NaN NaN NaN 0
3 Lvl AllPub ... 0 NaN NaN NaN 0
4 Lvl AllPub ... 0 NaN NaN NaN 0
... ... ... ... ... ... ... ... ...
1455 Lvl AllPub ... 0 NaN NaN NaN 0
1456 Lvl AllPub ... 0 NaN MnPrv NaN 0
1457 Lvl AllPub ... 0 NaN GdPrv Shed 2500
1458 Lvl AllPub ... 0 NaN NaN NaN 0
1459 Lvl AllPub ... 0 NaN NaN NaN 0
MoSold YrSold SaleType SaleCondition SalePrice
0 2 2008 WD Normal 208500
1 5 2007 WD Normal 181500
2 9 2008 WD Normal 223500
3 2 2006 WD Abnorml 140000
4 12 2008 WD Normal 250000
... ... ... ... ... ...
1455 8 2007 WD Normal 175000
1456 2 2010 WD Normal 210000
1457 5 2010 WD Normal 266500
1458 4 2010 WD Normal 142125
1459 6 2008 WD Normal 147500
[1460 rows x 81 columns]
In [169]:
train_original=train.copy()
test_original=test.copy()
In [7]:
print(test)
Id MSSubClass MSZoning LotFrontage LotArea Street Alley LotShape \
0 1461 20 RH 80.0 11622 Pave NaN Reg
1 1462 20 RL 81.0 14267 Pave NaN IR1
2 1463 60 RL 74.0 13830 Pave NaN IR1
3 1464 60 RL 78.0 9978 Pave NaN IR1
4 1465 120 RL 43.0 5005 Pave NaN IR1
... ... ... ... ... ... ... ... ...
1454 2915 160 RM 21.0 1936 Pave NaN Reg
1455 2916 160 RM 21.0 1894 Pave NaN Reg
1456 2917 20 RL 160.0 20000 Pave NaN Reg
1457 2918 85 RL 62.0 10441 Pave NaN Reg
1458 2919 60 RL 74.0 9627 Pave NaN Reg
LandContour Utilities ... ScreenPorch PoolArea PoolQC Fence \
0 Lvl AllPub ... 120 0 NaN MnPrv
1 Lvl AllPub ... 0 0 NaN NaN
2 Lvl AllPub ... 0 0 NaN MnPrv
3 Lvl AllPub ... 0 0 NaN NaN
4 HLS AllPub ... 144 0 NaN NaN
... ... ... ... ... ... ... ...
1454 Lvl AllPub ... 0 0 NaN NaN
1455 Lvl AllPub ... 0 0 NaN NaN
1456 Lvl AllPub ... 0 0 NaN NaN
1457 Lvl AllPub ... 0 0 NaN MnPrv
1458 Lvl AllPub ... 0 0 NaN NaN
MiscFeature MiscVal MoSold YrSold SaleType SaleCondition
0 NaN 0 6 2010 WD Normal
1 Gar2 12500 6 2010 WD Normal
2 NaN 0 3 2010 WD Normal
3 NaN 0 6 2010 WD Normal
4 NaN 0 1 2010 WD Normal
... ... ... ... ... ... ...
1454 NaN 0 6 2006 WD Normal
1455 NaN 0 4 2006 WD Abnorml
1456 NaN 0 9 2006 WD Abnorml
1457 Shed 700 7 2006 WD Normal
1458 NaN 0 11 2006 WD Normal
[1459 rows x 80 columns]
In [9]:
train.isnull().sum()[train.isnull().sum()>0]
Out[9]:
| 0 | |
|---|---|
| LotFrontage | 259 |
| Alley | 1369 |
| MasVnrType | 872 |
| MasVnrArea | 8 |
| BsmtQual | 37 |
| BsmtCond | 37 |
| BsmtExposure | 38 |
| BsmtFinType1 | 37 |
| BsmtFinType2 | 38 |
| Electrical | 1 |
| FireplaceQu | 690 |
| GarageType | 81 |
| GarageYrBlt | 81 |
| GarageFinish | 81 |
| GarageQual | 81 |
| GarageCond | 81 |
| PoolQC | 1453 |
| Fence | 1179 |
| MiscFeature | 1406 |
In [10]:
train.dtypes
Out[10]:
| 0 | |
|---|---|
| Id | int64 |
| MSSubClass | int64 |
| MSZoning | object |
| LotFrontage | float64 |
| LotArea | int64 |
| ... | ... |
| MoSold | int64 |
| YrSold | int64 |
| SaleType | object |
| SaleCondition | object |
| SalePrice | int64 |
81 rows × 1 columns
In [11]:
train.shape
Out[11]:
(1460, 81)
In [12]:
train['SalePrice'].value_counts()
Out[12]:
| count | |
|---|---|
| SalePrice | |
| 140000 | 20 |
| 135000 | 17 |
| 145000 | 14 |
| 155000 | 14 |
| 190000 | 13 |
| ... | ... |
| 223000 | 1 |
| 257000 | 1 |
| 282922 | 1 |
| 193879 | 1 |
| 137450 | 1 |
663 rows × 1 columns
In [13]:
train['SalePrice'].value_counts(normalize=True)
Out[13]:
| proportion | |
|---|---|
| SalePrice | |
| 140000 | 0.013699 |
| 135000 | 0.011644 |
| 145000 | 0.009589 |
| 155000 | 0.009589 |
| 190000 | 0.008904 |
| ... | ... |
| 223000 | 0.000685 |
| 257000 | 0.000685 |
| 282922 | 0.000685 |
| 193879 | 0.000685 |
| 137450 | 0.000685 |
663 rows × 1 columns
In [14]:
train['SalePrice'].value_counts(normalize=True).plot.bar(figsize=(5,5),title='SalePrice')
Out[14]:
<Axes: title={'center': 'SalePrice'}, xlabel='SalePrice'>
In [15]:
train['MSSubClass'].value_counts(normalize=True).plot.bar(title='MSSubClass')
Out[15]:
<Axes: title={'center': 'MSSubClass'}, xlabel='MSSubClass'>
In [18]:
train['MSZoning'].value_counts(normalize=True).plot.bar()
Out[18]:
<Axes: xlabel='MSZoning'>
In [19]:
plt.figure(1)
plt.subplot(111)
train['LotFrontage'].value_counts(normalize=True).plot.box()
Out[19]:
<Axes: >
In [20]:
plt.figure(1)
plt.subplot(111)
sns.distplot(train['LotArea'])
Out[20]:
<Axes: xlabel='LotArea', ylabel='Density'>
In [21]:
train['Street'].value_counts(normalize=True).plot.bar(figsize=(5,5),title='Street')
Out[21]:
<Axes: title={'center': 'Street'}, xlabel='Street'>
In [22]:
train['Alley'].value_counts(normalize=True).plot.bar(figsize=(5,5),title='Alley')
Out[22]:
<Axes: title={'center': 'Alley'}, xlabel='Alley'>
In [23]:
train['LotShape'].value_counts(normalize=True).plot.bar(figsize=(5,5),title='LotShape')
Out[23]:
<Axes: title={'center': 'LotShape'}, xlabel='LotShape'>
In [24]:
train['LandContour'].value_counts(normalize=True).plot.bar()
Out[24]:
<Axes: xlabel='LandContour'>
In [25]:
train['Utilities'].value_counts(normalize=True).plot.bar(figsize=(5,5),title='Utilities')
Out[25]:
<Axes: title={'center': 'Utilities'}, xlabel='Utilities'>
In [26]:
train['LotConfig'].value_counts(normalize=True).plot.bar(figsize=(5,5),title='LotConfig')
Out[26]:
<Axes: title={'center': 'LotConfig'}, xlabel='LotConfig'>
In [27]:
train['LandSlope'].value_counts(normalize=True).plot.bar(figsize=(5,5),title='LandSlope')
Out[27]:
<Axes: title={'center': 'LandSlope'}, xlabel='LandSlope'>
In [28]:
train['Neighborhood'].value_counts(normalize=True).plot.bar(figsize=(5,5),title='Neighborhood')
Out[28]:
<Axes: title={'center': 'Neighborhood'}, xlabel='Neighborhood'>
In [29]:
train['Condition1'].value_counts(normalize=True).plot.bar(figsize=(5,5),title='Condition1')
Out[29]:
<Axes: title={'center': 'Condition1'}, xlabel='Condition1'>
In [30]:
train['Condition2'].value_counts(normalize=True).plot.bar(figsize=(5,5),title='Condition2')
Out[30]:
<Axes: title={'center': 'Condition2'}, xlabel='Condition2'>
In [32]:
train['BldgType'].value_counts(normalize=True).plot.bar(figsize=(5,5),title='Bldgtype')
Out[32]:
<Axes: title={'center': 'Bldgtype'}, xlabel='BldgType'>
In [33]:
train['HouseStyle'].value_counts(normalize=True).plot.bar(figsize=(5,5),title='HouseStyle')
Out[33]:
<Axes: title={'center': 'HouseStyle'}, xlabel='HouseStyle'>
In [34]:
train['OverallQual'].value_counts(normalize=True).plot.bar(figsize=(5,5),title='OverallQual')
Out[34]:
<Axes: title={'center': 'OverallQual'}, xlabel='OverallQual'>
In [36]:
train['FullBath'].value_counts(normalize=True).plot.bar(figsize=(5,5),title='FullBath')
Out[36]:
<Axes: title={'center': 'FullBath'}, xlabel='FullBath'>
In [37]:
train['HalfBath'].value_counts(normalize=True).plot.bar(figsize=(5,5),title='HalfBath')
Out[37]:
<Axes: title={'center': 'HalfBath'}, xlabel='HalfBath'>
In [38]:
train['PoolArea'].value_counts(normalize=True).plot.bar(figsize=(5,5),title='PoolArea')
Out[38]:
<Axes: title={'center': 'PoolArea'}, xlabel='PoolArea'>
In [41]:
train['SaleCondition'].value_counts(normalize=True).plot.bar(figsize=(5,5),title='SaleCondition')
Out[41]:
<Axes: title={'center': 'SaleCondition'}, xlabel='SaleCondition'>
In [44]:
train['SaleType'].value_counts(normalize=True).plot.bar(figsize=(5,5),title='SaleType')
Out[44]:
<Axes: title={'center': 'SaleType'}, xlabel='SaleType'>
In [45]:
MSZoning=pd.crosstab(train['MSZoning'],train['SalePrice'])
MSZoning.div(MSZoning.sum(1).astype(float),axis=0).plot(kind='bar',stacked=True,figsize=(5,5))
Out[45]:
<Axes: xlabel='MSZoning'>
In [46]:
MSSubClass=pd.crosstab(train['MSSubClass'],train['SalePrice'])
MSSubClass.div(MSSubClass.sum(1).astype(float),axis=0).plot(kind='bar',stacked=True,figsize=(5,5))
Out[46]:
<Axes: xlabel='MSSubClass'>
In [47]:
SaleType=pd.crosstab(train['SaleType'],train['SalePrice'])
SaleType.div(SaleType.sum(1).astype(float),axis=0).plot(kind='bar',stacked=True,figsize=(5,5))
Out[47]:
<Axes: xlabel='SaleType'>
In [48]:
SaleCondition=pd.crosstab(train['SaleCondition'],train['SalePrice'])
SaleCondition.div(SaleCondition.sum(1).astype(float),axis=0).plot(kind='bar',stacked=True,figsize=(5,5))
Out[48]:
<Axes: xlabel='SaleCondition'>
In [49]:
PoolArea=pd.crosstab(train['PoolArea'],train['SalePrice'])
PoolArea.div(PoolArea.sum(1).astype(float),axis=0).plot(kind='bar',stacked=True,figsize=(5,5))
Out[49]:
<Axes: xlabel='PoolArea'>
In [50]:
Electrical=pd.crosstab(train['Electrical'],train['SalePrice'])
Electrical.div(Electrical.sum(1).astype(float),axis=0).plot(kind='bar',stacked=True,figsize=(5,5))
Out[50]:
<Axes: xlabel='Electrical'>
In [51]:
HouseStyle=pd.crosstab(train['HouseStyle'],train['SalePrice'])
HouseStyle.div(HouseStyle.sum(1).astype(float),axis=0).plot(kind='bar',stacked=True,figsize=(5,5))
Out[51]:
<Axes: xlabel='HouseStyle'>
In [53]:
Heating=pd.crosstab(train['Heating'],train['SalePrice'])
Heating.div(Heating.sum(1).astype(float),axis=0).plot(kind='bar',stacked=True,figsize=(5,5))
Out[53]:
<Axes: xlabel='Heating'>
In [55]:
train.groupby('SalePrice')['PoolArea'].mean().plot.bar()
Out[55]:
<Axes: xlabel='SalePrice'>
In [56]:
bins=[0,2500,4000,6000,81000]
group=['Low','Average','High','VeryHigh']
train['Area_bin']=pd.cut(train['LotArea'],bins,labels=group)
Area_bin=pd.crosstab(train['Area_bin'],train['SalePrice'])
Area_bin.div(Area_bin.sum(1).astype(float),axis=0).plot(kind='bar',stacked=True,figsize=(5,5))
plt.xlabel('LotArea')
plt.ylabel('Percentage')
Out[56]:
Text(0, 0.5, 'Percentage')
In [57]:
train=train.drop(['Area_bin'],axis=1)
In [131]:
train.isnull().sum()[train.isnull().sum()>0]
Out[131]:
| 0 | |
|---|---|
| LotFrontage | 259 |
| Alley | 1369 |
| MasVnrType | 872 |
| MasVnrArea | 8 |
| BsmtQual | 37 |
| BsmtCond | 37 |
| BsmtExposure | 38 |
| BsmtFinType1 | 37 |
| BsmtFinType2 | 38 |
| Electrical | 1 |
| FireplaceQu | 690 |
| GarageType | 81 |
| GarageYrBlt | 81 |
| GarageFinish | 81 |
| GarageQual | 81 |
| GarageCond | 81 |
| PoolQC | 1453 |
| Fence | 1179 |
| MiscFeature | 1406 |
In [132]:
train.dtypes[train.isnull().sum()>0]
Out[132]:
| 0 | |
|---|---|
| LotFrontage | float64 |
| Alley | object |
| MasVnrType | object |
| MasVnrArea | float64 |
| BsmtQual | object |
| BsmtCond | object |
| BsmtExposure | object |
| BsmtFinType1 | object |
| BsmtFinType2 | object |
| Electrical | object |
| FireplaceQu | object |
| GarageType | object |
| GarageYrBlt | float64 |
| GarageFinish | object |
| GarageQual | object |
| GarageCond | object |
| PoolQC | object |
| Fence | object |
| MiscFeature | object |
In [170]:
train['LotFrontage'].fillna(train['LotFrontage'].mode()[0],inplace=True)
train['Alley'].fillna(train['Alley'].mode()[0],inplace=True)
train['MasVnrType'].fillna(train['MasVnrType'].mode()[0],inplace=True)
train['MasVnrArea'].fillna(train['MasVnrArea'].mode()[0],inplace=True)
train['BsmtQual'].fillna(train['BsmtQual'].mode()[0],inplace=True)
train['BsmtCond'].fillna(train['BsmtCond'].mode()[0],inplace=True)
train['BsmtExposure'].fillna(train['BsmtExposure'].mode()[0],inplace=True)
train['BsmtFinType1'].fillna(train['BsmtFinType1'].mode()[0],inplace=True)
train['BsmtFinType2'].fillna(train['BsmtFinType2'].mode()[0],inplace=True)
train['Electrical'].fillna(train['Electrical'].mode()[0],inplace=True)
train['FireplaceQu'].fillna(train['FireplaceQu'].mode()[0],inplace=True)
train['GarageType'].fillna(train['GarageType'].mode()[0],inplace=True)
train['GarageYrBlt'].fillna(train['GarageYrBlt'].mode()[0],inplace=True)
train['GarageQual'].fillna(train['GarageQual'].mode()[0],inplace=True)
train['GarageCond'].fillna(train['GarageCond'].mode()[0],inplace=True)
train['PoolQC'].fillna(train['PoolQC'].mode()[0],inplace=True)
train['Fence'].fillna(train['Fence'].mode()[0],inplace=True)
train['MiscFeature'].fillna(train['MiscFeature'].mode()[0],inplace=True)
train['GarageFinish'].fillna(train['GarageFinish'].mode()[0],inplace=True)
In [158]:
train.isnull().sum()
Out[158]:
| 0 | |
|---|---|
| Id | 0 |
| MSSubClass | 0 |
| MSZoning | 0 |
| LotFrontage | 0 |
| LotArea | 0 |
| ... | ... |
| MoSold | 0 |
| YrSold | 0 |
| SaleType | 0 |
| SaleCondition | 0 |
| SalePrice | 0 |
81 rows × 1 columns
In [147]:
test.isnull().sum()[test.isnull().sum()>0]
Out[147]:
| 0 | |
|---|---|
| MSZoning | 4 |
| LotFrontage | 227 |
| Alley | 1352 |
| Utilities | 2 |
| Exterior1st | 1 |
| Exterior2nd | 1 |
| MasVnrType | 894 |
| MasVnrArea | 15 |
| BsmtQual | 44 |
| BsmtCond | 45 |
| BsmtExposure | 44 |
| BsmtFinType1 | 42 |
| BsmtFinSF1 | 1 |
| BsmtFinType2 | 42 |
| BsmtFinSF2 | 1 |
| BsmtUnfSF | 1 |
| TotalBsmtSF | 1 |
| BsmtFullBath | 2 |
| BsmtHalfBath | 2 |
| KitchenQual | 1 |
| Functional | 2 |
| FireplaceQu | 730 |
| GarageType | 76 |
| GarageYrBlt | 78 |
| GarageFinish | 78 |
| GarageCars | 1 |
| GarageArea | 1 |
| GarageQual | 78 |
| GarageCond | 78 |
| PoolQC | 1456 |
| Fence | 1169 |
| MiscFeature | 1408 |
| SaleType | 1 |
In [171]:
test['LotFrontage'].fillna(test['LotFrontage'].mode()[0],inplace=True)
test['MSZoning'].fillna(test['MSZoning'].mode()[0],inplace=True)
test['Alley'].fillna(test['Alley'].mode()[0],inplace=True)
test['Utilities'].fillna(test['Utilities'].mode()[0],inplace=True)
test['Exterior1st'].fillna(test['Exterior1st'].mode()[0],inplace=True)
test['Exterior2nd'].fillna(test['Exterior2nd'].mode()[0],inplace=True)
test['BsmtFinSF1'].fillna(test['BsmtFinSF1'].mode()[0],inplace=True)
test['BsmtFinSF2'].fillna(test['BsmtFinSF2'].mode()[0],inplace=True)
test['BsmtUnfSF'].fillna(test['BsmtUnfSF'].mode()[0],inplace=True)
test['TotalBsmtSF'].fillna(test['TotalBsmtSF'].mode()[0],inplace=True)
test['MasVnrType'].fillna(test['MasVnrType'].mode()[0],inplace=True)
test['MasVnrArea'].fillna(test['MasVnrArea'].mode()[0],inplace=True)
test['BsmtQual'].fillna(test['BsmtQual'].mode()[0],inplace=True)
test['BsmtCond'].fillna(test['BsmtCond'].mode()[0],inplace=True)
test['BsmtExposure'].fillna(test['BsmtExposure'].mode()[0],inplace=True)
test['BsmtFinType1'].fillna(test['BsmtFinType1'].mode()[0],inplace=True)
test['BsmtFinType2'].fillna(test['BsmtFinType2'].mode()[0],inplace=True)
test['FireplaceQu'].fillna(test['FireplaceQu'].mode()[0],inplace=True)
test['GarageType'].fillna(test['GarageType'].mode()[0],inplace=True)
test['GarageYrBlt'].fillna(test['GarageYrBlt'].mode()[0],inplace=True)
test['GarageQual'].fillna(test['GarageQual'].mode()[0],inplace=True)
test['GarageCond'].fillna(test['GarageCond'].mode()[0],inplace=True)
test['GarageCars'].fillna(test['GarageCars'].mode()[0],inplace=True)
test['GarageArea'].fillna(test['GarageArea'].mode()[0],inplace=True)
test['SaleType'].fillna(test['SaleType'].mode()[0],inplace=True)
test['Fence'].fillna(test['Fence'].mode()[0],inplace=True)
test['PoolQC'].fillna(test['PoolQC'].mode()[0],inplace=True)
test['MiscFeature'].fillna(test['MiscFeature'].mode()[0],inplace=True)
test['GarageFinish'].fillna(test['GarageFinish'].mode()[0],inplace=True)
test['Functional'].fillna(test['Functional'].mode()[0],inplace=True)
test['BsmtFullBath'].fillna(test['BsmtFullBath'].mode()[0],inplace=True)
test['BsmtHalfBath'].fillna(test['BsmtHalfBath'].mode()[0],inplace=True)
test['KitchenQual'].fillna(test['KitchenQual'].mode()[0],inplace=True)
In [149]:
test.isnull().sum()
Out[149]:
| 0 | |
|---|---|
| Id | 0 |
| MSSubClass | 0 |
| MSZoning | 0 |
| LotFrontage | 0 |
| LotArea | 0 |
| ... | ... |
| MiscVal | 0 |
| MoSold | 0 |
| YrSold | 0 |
| SaleType | 0 |
| SaleCondition | 0 |
80 rows × 1 columns
In [172]:
train=train.drop('Id',axis=1)
test=test.drop('Id',axis=1)
X=train.drop('SalePrice',axis=1)
Y=train.SalePrice
# Apply one-hot encoding separately
X = pd.get_dummies(X)
test = pd.get_dummies(test)
# Align columns - add missing columns to test set with fill_value=0
test_aligned = test.reindex(columns=X.columns, fill_value=0)
In [151]:
fig=plt.figure()
ax=plt.subplot(111)
cax=ax.matshow(train.corr(),vmin=1,vmax=1)
fig.colorbar(cax)
plt.show()
In [173]:
from sklearn.model_selection import train_test_split
x_train,x_cv,y_train,y_cv=train_test_split(X,Y,test_size=0.3)
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
model=LinearRegression()
model.fit(x_train,y_train)
LinearRegression()
Out[173]:
LinearRegression()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
LinearRegression()
In [174]:
pred_cv=model.predict(x_cv)
mse=mean_squared_error(y_cv,pred_cv)
In [175]:
pred_test=model.predict(test_aligned)
In [176]:
submission=pd.read_csv("sample_submission.csv")
In [177]:
submission['SalePrice']=pred_test
submission['Id']=test_original['Id']
In [178]:
pd.DataFrame(submission,columns=['Id','SalePrice']).to_csv("linear.csv")
In [ ]:
from google.colab import files
files.download("linear.csv")
In [180]:
from sklearn.model_selection import train_test_split
x_train,x_cv,y_train,y_cv=train_test_split(X,Y,test_size=0.3)
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error
model=KNeighborsRegressor()
model.fit(x_train,y_train)
KNeighborsRegressor()
Out[180]:
KNeighborsRegressor()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
KNeighborsRegressor()
In [181]:
pred_cv=model.predict(x_cv)
mse=mean_squared_error(y_cv,pred_cv)
In [182]:
pred_test=model.predict(test_aligned)
In [183]:
submission['SalePrice']=pred_test
submission['Id']=test_original['Id']
In [184]:
pd.DataFrame(submission,columns=['Id','SalePrice']).to_csv("KNN.csv")
In [185]:
from sklearn.model_selection import train_test_split
x_train,x_cv,y_train,y_cv=train_test_split(X,Y,test_size=0.3)
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error
model=SVR()
model.fit(x_train,y_train)
SVR()
Out[185]:
SVR()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
SVR()
In [187]:
pred_cv=model.predict(x_cv)
mse=mean_squared_error(y_cv,pred_cv)
pred_test=model.predict(test_aligned)
submission['SalePrice']=pred_test
submission['Id']=test_original['Id']
pd.DataFrame(submission,columns=['Id','SalePrice']).to_csv("SVR.csv")
In [193]:
from sklearn.model_selection import train_test_split
x_train,x_cv,y_train,y_cv=train_test_split(X,Y,test_size=0.3)
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error
model=DecisionTreeRegressor()
model.fit(x_train,y_train)
DecisionTreeRegressor()
Out[193]:
DecisionTreeRegressor()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
DecisionTreeRegressor()
In [194]:
pred_cv=model.predict(x_cv)
mse=mean_squared_error(y_cv,pred_cv)
pred_test=model.predict(test_aligned)
submission['SalePrice']=pred_test
submission['Id']=test_original['Id']
pd.DataFrame(submission,columns=['Id','SalePrice']).to_csv("DT.csv")
In [195]:
from sklearn.model_selection import train_test_split
x_train,x_cv,y_train,y_cv=train_test_split(X,Y,test_size=0.3)
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
model=RandomForestRegressor()
model.fit(x_train,y_train)
RandomForestRegressor()
Out[195]:
RandomForestRegressor()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
RandomForestRegressor()
In [196]:
pred_cv=model.predict(x_cv)
mse=mean_squared_error(y_cv,pred_cv)
pred_test=model.predict(test_aligned)
submission['SalePrice']=pred_test
submission['Id']=test_original['Id']
pd.DataFrame(submission,columns=['Id','SalePrice']).to_csv("Rf.csv")
In [197]:
from sklearn.model_selection import train_test_split
x_train,x_cv,y_train,y_cv=train_test_split(X,Y,test_size=0.3)
from sklearn.ensemble import AdaBoostRegressor
from sklearn.metrics import mean_squared_error
model=AdaBoostRegressor()
model.fit(x_train,y_train)
AdaBoostRegressor()
Out[197]:
AdaBoostRegressor()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
AdaBoostRegressor()
In [ ]:
In [198]:
pred_cv=model.predict(x_cv)
mse=mean_squared_error(y_cv,pred_cv)
pred_test=model.predict(test_aligned)
submission['SalePrice']=pred_test
submission['Id']=test_original['Id']
pd.DataFrame(submission,columns=['Id','SalePrice']).to_csv("AB.csv")
In [ ]:
In [199]:
from sklearn.model_selection import train_test_split
x_train,x_cv,y_train,y_cv=train_test_split(X,Y,test_size=0.3)
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.metrics import mean_squared_error
model=ExtraTreesRegressor()
model.fit(x_train,y_train)
ExtraTreesRegressor()
Out[199]:
ExtraTreesRegressor()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
ExtraTreesRegressor()
In [200]:
pred_cv=model.predict(x_cv)
mse=mean_squared_error(y_cv,pred_cv)
pred_test=model.predict(test_aligned)
submission['SalePrice']=pred_test
submission['Id']=test_original['Id']
pd.DataFrame(submission,columns=['Id','SalePrice']).to_csv("ET.csv")
In [201]:
from sklearn.model_selection import train_test_split
x_train,x_cv,y_train,y_cv=train_test_split(X,Y,test_size=0.3)
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error
model=GradientBoostingRegressor()
model.fit(x_train,y_train)
GradientBoostingRegressor()
Out[201]:
GradientBoostingRegressor()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
GradientBoostingRegressor()
In [202]:
pred_cv=model.predict(x_cv)
mse=mean_squared_error(y_cv,pred_cv)
pred_test=model.predict(test_aligned)
submission['SalePrice']=pred_test
submission['Id']=test_original['Id']
pd.DataFrame(submission,columns=['Id','SalePrice']).to_csv("GB.csv")
In [203]:
submission.to_csv('linear.csv',index=False)
from google.colab import files
files.download("linear.csv")
In [204]:
submission.to_csv("KNN.csv",index=False)
from google.colab import files
files.download("KNN.csv")
In [205]:
submission.to_csv("SVR.csv",index=False)
from google.colab import files
files.download("SVR.csv")
In [206]:
submission.to_csv("DT.csv",index=False)
from google.colab import files
files.download("DT.csv")
In [207]:
submission.to_csv("Rf.csv",index=False)
from google.colab import files
files.download("Rf.csv")
In [208]:
submission.to_csv("ET.csv",index=False)
from google.colab import files
files.download("ET.csv")
In [209]:
submission.to_csv("AB.csv",index=False)
from google.colab import files
files.download("AB.csv")
In [210]:
submission.to_csv("GB.csv",index=False)
from google.colab import files
files.download("GB.csv")
In [213]:
importances=pd.Series(model.feature_importances_,index=X.columns)
importances.plot(kind='barh',figsize=(80,80))
Out[213]:
<Axes: >
In [ ]:
from google.colab import files
uploaded=files.upload()
In [ ]: